import requests
# url = "https://jobs.51job.com/shenzhen-nsq/124148620.html?s=01&amp;t=0"
url = "https://jobs.51job.com/shenzhen-ftq/107789705.html?s=01&t=0"
headers = {'Host': 'search.51job.com',
           'Upgrade-Insecure-Requests': '1',
           'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'}
# 获取网页数据
html = requests.get(url,headers = headers,timeout = 10)

html_content = html.content.decode("gbk")

import re

from lxml import html
page = html.fromstring(html_content)
job = page.xpath('//div[@class="tHeader tHjob"]//h1/text()')[0]
company = page.xpath('//p[@class="cname"]/a/text()')[0]
label = page.xpath('//div[@class="page"]/span/text()')
education = page.xpath('//div[@class="cn"]/p[2]/text()')[2]
salary =  page.xpath('/html/body/div[3]/div[2]/div[2]/div/div[1]/strong/text()')[0]#
area = page.xpath('//div[@class="cn"]/p[2]/text()')[0]
companytype = page.xpath('/html/body/div[3]/div[2]/div[4]/div[1]/div[2]/p[1]/text()')[0]
companyScale = page.xpath('/html/body/div[3]/div[2]/div[4]/div[1]/div[2]/p[2]/text()')[0]
scope = page.xpath('/html/body/div[3]/div[2]/div[4]/div[1]/div[2]/p[3]/a[1]/text()')[0]
workyear = page.xpath('//div[@class="cn"]/p[2]/text()')[1]
require = re.findall(r'(?:工作|任职)要求(.*)<div class="mt10">',html_content,re.S)

row = [str(company), str(job), str(education), str(label), str(salary),
       str(companytype), str(companyScale), str(scope), str(workyear), str(area), str(require)]
row = [s.strip() for s in row] # 去空格
print(row)
